In [1]:
import os
import anndata
import sys
import numpy as np
wd = '/home/clarice/Documents/SingleCell_PseudoTime/'
os.chdir(wd)
sys.path.append('extras/Stabilized_ICA')
In [2]:
#-- Read data
sc200 = anndata.read_h5ad('data/sc200CL_pp.h5ad')

ref_cell_line = 'JHU011_UPPER_AERODIGESTIVE_TRACT'

Preprocessing

In [3]:
import scycle as cc

cc.pp.prep_pooling(sc200)
Preparing embedding...
38919 samples pass the count filter
42362  samples pass the mt filter
Samples selected 38919
/home/clarice/anaconda3/lib/python3.7/site-packages/anndata/_core/anndata.py:1094: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead
Embedding for pooling...
Pooling 38919 samples...
/home/clarice/anaconda3/lib/python3.7/site-packages/anndata/_core/anndata.py:1094: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead

On this warning: Nothing I can do about it as of now, it's an internal error from anndata

In [4]:
cc.pp.score_cell_cycle(sc200)
-- Scoring G1 genes...
/home/clarice/anaconda3/lib/python3.7/site-packages/anndata/_core/anndata.py:1094: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead
-- Scoring S-phase...
WARNING: genes are not in var_names and ignored: ['UHRF1', 'ORC1', 'ALG6', 'CENPI', 'OGN', 'CASP8AP2', 'FAM178A', 'C20orf72', 'OMD', 'ANKRD32', 'TRIM45', 'DNA2', 'C4orf21', 'FAM54A', 'MLF1IP', 'EME1', 'ZNF367', 'C11orf82', 'KIAA1731', 'C18orf54', 'TMEM194A', 'MTBP', 'MBOAT1', 'ZNF519', 'LIN9', 'KNTC1', 'ZNF93', 'ZNF107', 'TMEM116', 'DDX12P', 'SNHG1']
/home/clarice/anaconda3/lib/python3.7/site-packages/anndata/_core/anndata.py:1094: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead
-- Scoring G2-M...
WARNING: genes are not in var_names and ignored: ['TMEM48', 'CENPI', 'C15orf23', 'ESPL1', 'MLF1IP', 'C11orf82', 'C18orf54', 'TMEM194A', 'GEN1', 'TRAIP', 'ERCC6L', 'ARHGAP19', 'FAM72D', 'RP11-303E16.2', 'CTD-2510F5.4']
/home/clarice/anaconda3/lib/python3.7/site-packages/anndata/_core/anndata.py:1094: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead
-- Scoring Histones...
Found histone genes: H2AFZ H3F3B H3F3A H2AFV H2AFJ H2AFY H1FX H1F0 H2AFX H2AFY2
-- Scalling signatures...

Integration

Here we run the first steps of the pipeline on the reference:

In [5]:
scref = sc200[sc200.obs['Cell_line'] == ref_cell_line]
cc.tl.dimensionality_reduction(scref, method = 'ica', n_comps = 30, seed = 0)
cc.tl.enrich_components(scref)
-- Dimensionality reduction using ICA...
-- Done
--- Selected components:
G1/S: 0
G2/M: 1
G2/M-: 5
Histones: 6

And here comes the OT integration implemented by Aziz:

In [6]:
cc.tl.integration(sc200, scref)
/home/clarice/anaconda3/lib/python3.7/site-packages/anndata/_core/anndata.py:1094: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead

Finding the trajectory

In [7]:
cc.tl.principal_circle(sc200)
The initial number of nodes must be at least 3. This will be fixed
Generating the initial configuration
Creating a circle in the plane induced by the 1st and 2nd PCs with 3 nodes
90% of the points have been used as initial conditions. Resetting.
Constructing tree 1 of 1 / Subset 1 of 1
Performing PCA
Using standard PCA
4 dimensions are being used
100.0 % of the original variance has been retained
The elastic matrix is being used. Edge configuration will be ignored
Computing EPG with  30  nodes on  38919  points and  4  dimensions
BARCODE	ENERGY	NNODES	NEDGES	NRIBS	NSTARS	NRAYS	NRAYS2	MSE	MSEP	FVE	FVEP	UE	UR	URN	URN2	URSD

0||4	64365.8884	4	4	4	0	0	0	44695.7973	41557.6823	0.6623	0.686	3173.847	16496.244	65984.976	263939.9042	0
0||5	51640.5964	5	5	5	0	0	0	32993.6515	30232.4719	0.7507	0.7716	3852.6619	14794.283	73971.415	369857.0752	0
0||6	43083.3782	6	6	6	0	0	0	26415.2045	23694.5608	0.8004	0.821	4111.115	12557.0588	75342.3528	452054.1165	0
0||7	37126.6884	7	7	7	0	0	0	22146.2515	19202.264	0.8327	0.8549	4328.0312	10652.4057	74566.8399	521967.8796	0
0||8	32698.4978	8	8	8	0	0	0	19431.1297	16563.146	0.8532	0.8748	4301.8633	8965.5048	71724.0385	573792.3078	0
0||9	28874.0751	9	9	9	0	0	0	17308.1552	14543.8224	0.8692	0.8901	4133.5502	7432.3696	66891.3264	602021.9378	0
0||10	25721.3256	10	10	10	0	0	0	15007.6918	12796.8595	0.8866	0.9033	4059.9613	6653.6725	66536.7246	665367.2465	0
0||11	22897.9489	11	11	11	0	0	0	12994.4359	10803.3807	0.9018	0.9184	4086.3232	5817.1898	63989.0877	703879.9647	0
0||12	20831.4921	12	12	12	0	0	0	11414.495	9524.6268	0.9138	0.928	4105.14	5311.8571	63742.285	764907.4206	0
0||13	19017.8981	13	13	13	0	0	0	10183.0046	8366.9294	0.9231	0.9368	4141.6636	4693.2299	61011.9888	793155.8546	0
0||14	17426.2316	14	14	14	0	0	0	9340.9373	7716.5845	0.9294	0.9417	3973.5185	4111.7757	57564.8604	805908.0455	0
0||15	16245.4755	15	15	15	0	0	0	8739.5735	7185.3467	0.934	0.9457	3864.1594	3641.7426	54626.1393	819392.0896	0
0||16	15231.3636	16	16	16	0	0	0	8134.528	6697.8718	0.9385	0.9494	3801.0	3295.8357	52733.3713	843733.9413	0
0||17	14343.1169	17	17	17	0	0	0	7706.132	6370.7692	0.9418	0.9519	3695.2319	2941.753	50009.8007	850166.6123	0
0||18	13500.3733	18	18	18	0	0	0	7287.0984	6009.9411	0.9449	0.9546	3575.4942	2637.7806	47480.0517	854640.9299	0
0||19	12870.6989	19	19	19	0	0	0	7014.0247	5858.9355	0.947	0.9557	3429.4622	2427.2121	46117.0302	876223.5742	0
0||20	12233.3854	20	20	20	0	0	0	6678.8224	5576.6729	0.9495	0.9579	3359.2594	2195.3036	43906.0711	878121.4212	0
0||21	11688.1179	21	21	21	0	0	0	6424.733	5385.3384	0.9515	0.9593	3260.3937	2002.9912	42062.8153	883319.1218	0
0||22	11159.6118	22	22	22	0	0	0	6148.1161	5132.9444	0.9535	0.9612	3198.2685	1813.2272	39890.9988	877601.9742	0
0||23	10785.79	23	23	23	0	0	0	5989.9544	5061.4124	0.9547	0.9618	3084.0118	1711.8237	39371.9448	905554.7308	0
0||24	10399.0644	24	24	24	0	0	0	5824.0726	4926.4548	0.956	0.9628	2997.2456	1577.7462	37865.9089	908781.8126	0
0||25	10053.2224	25	25	25	0	0	0	5656.3613	4788.9779	0.9573	0.9638	2932.555	1464.3061	36607.6537	915191.3434	0
0||26	9739.2917	26	26	26	0	0	0	5481.874	4680.2822	0.9586	0.9646	2871.3821	1386.0356	36036.9244	936960.034	0
0||27	9349.2959	27	27	27	0	0	0	5318.8775	4575.01	0.9598	0.9654	2783.493	1246.9253	33666.9836	909008.5566	0
0||28	9063.3057	28	28	28	0	0	0	5149.6817	4470.107	0.9611	0.9662	2729.286	1184.338	33161.4647	928521.0119	0
0||29	8795.2575	29	29	29	0	0	0	5027.2039	4368.9478	0.962	0.967	2671.1843	1096.8692	31809.208	922467.0315	0
0||30	8578.808	30	30	30	0	0	0	4910.4124	4286.4278	0.9629	0.9676	2631.112	1037.2837	31118.5102	933555.3045	0
5.2069  seconds elapsed
In [8]:
cc.pl.scatter_projection(sc200, col_var = 'Cancer_type', trajectory = True)
/home/clarice/anaconda3/lib/python3.7/site-packages/plotnine/utils.py:1246: FutureWarning: is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead
Out[8]:
<ggplot: (8768636921069)>
In [9]:
cc.pl.scatter_projection3d(sc200, col_var = 'Cancer_type', alpha = 0.3,
                           trajectory = True, size = 3, node_color = 'black', palette = 'Accent')

Find cell division moment

This is the clunkiest part of this pipeline, we may actually benefit from the alternative normalization based on the trajectory even if it was just to streamline this part.

In [10]:
cc.tl.celldiv_moment(sc200, var = 'total_counts')
cc.pl.scatter_projection(sc200, trajectory = True)
Suggested moment of cell division: [16 17]
Direction of cell cycle: 1
Out[10]:
<ggplot: (8768955894765)>

Total counts are kind of all over the place here. But there seems to be a bit of a gap around node 10?

In [11]:
cc.pl.scatter_projection(sc200, col_var = 'G1/S_score', trajectory = True)
Out[11]:
<ggplot: (8768918312665)>
In [12]:
cc.pl.scatter_projection(sc200, col_var = 'G2-M', trajectory = True)
Out[12]:
<ggplot: (8768918385137)>
In [13]:
cc.tl.remap_nodes(sc200, celldiv_edge = [10,11], cycle_direction = -1)
Remapping edges using [10, 11] ...
In [14]:
cc.tl.pseudotime(sc200)
Calculating pseudotimes for each cell...
In [15]:
cc.pl.hist_pseudotime(sc200)
Out[15]:
<ggplot: (8768912649845)>
In [16]:
cc.pl.scatter_cell_cycle(sc200)
/home/clarice/anaconda3/lib/python3.7/site-packages/plotnine/utils.py:1246: FutureWarning:

is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead

Out[16]:
<ggplot: (8768913428749)>
In [17]:
cc.pl.scatter_cell_cycle(sc200, scores = 'components')
/home/clarice/anaconda3/lib/python3.7/site-packages/plotnine/utils.py:1246: FutureWarning:

is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead

Out[17]:
<ggplot: (8768912538889)>

Dividing the cell cycle

Of note, cc.tl.cell_cycle_phase already uses the curvature settings to find the cell cycle divisions.

In [18]:
cc.tl.cell_cycle_phase(sc200)
(30, 4)
-- Suggested cell cycle division:
G1:  0   - 0.3
S:  0.3 - 0.6666666666666666
G2: 0.6666666666666666 - 0.8666666666666667
M:  0.8666666666666667 -   1
In [19]:
cc.pl.scatter_cell_cycle(sc200, scores = 'components', alpha = 0.1)
/home/clarice/anaconda3/lib/python3.7/site-packages/plotnine/utils.py:1246: FutureWarning:

is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead

/home/clarice/anaconda3/lib/python3.7/site-packages/plotnine/layer.py:464: PlotnineWarning:

geom_vline : Removed 1 rows containing missing values.

Out[19]:
<ggplot: (8768918285437)>
In [20]:
cc.tl.annotate_cell_cycle(sc200)
In [21]:
cc.pl.barplot_cycle_phase(sc200)
/home/clarice/anaconda3/lib/python3.7/site-packages/plotnine/utils.py:1246: FutureWarning:

is_categorical is deprecated and will be removed in a future version.  Use is_categorical_dtype instead

Out[21]:
<ggplot: (8768918282237)>

Considering making this a piechart? I'd use our alternative plotting system with plotly for that since plotnine doesn't currently support polar coordinates, unfortunetely.

In [23]:
cc.pl.scatter_projection3d(sc200, col_var = 'cell_cycle_phase', size = 3)